### Replication file for The Effects of State Coercion on Voting Outcome in Protest Movement ###
### Author: Weiwen Yin, Weidong Huo, Danyang Lin ###


#install.packages("grf")
#install.packages("sandwich")
#install.packages("lmtest")
#install.packages("Hmisc")
#install.packages("ggplot2")
#install.packages("lattice")
#install.packages("zoo")
#install.packages("stargazer")
#install.packages("xtable")
#install.packages("ntile")
#install.packages("boot",dep=TRUE)
#install.packages("ggpubr")
#install.packages("cowplot")


rm(list = ls())
set.seed(100)
library(grf)
if(packageVersion("grf") < '0.10.2') {
  warning("This script requires grf 0.10.2 or higher")
}
library(foreign)
library(interflex)
library(stringi)
library(ggplot2)
library(dplyr)    
library(fBasics)     
library(grf)         
library(devtools)    
library(xtable)
library(aod)
library(tidyr)
library(texreg)
library(sandwich)
library(lmtest)
library(ggpubr)
library(boot)
library(ggpubr)
library(cowplot)

## set root directory of the replication file
path <- ""

## set graph folder (create a folder named Graphs under path)
graphpath<-paste(path,"Graphs/",sep="")

data.raw = read.dta("HK2019analysis_psrm_12.dta")

name<-"HK2019"

Y="dem" 
D="tear" #treatment

Z=c(
  "dem_compete", "logsize", "logdensity", 
  "new_below35", "old_below35", 
  "new_middle", "old_middle",
  "new_above61", "female",
  "dem_incumbent", "manufacture", "construct", "trade", "transport", "service",
  "information", "finance", "professional", "administration", "miscellaneous",
  "mandarin", "house_size", "income", 
  "mortgage", "rent", "floor", 
  "govern_fund", "protest_num")


df = data.raw[,c(Y,D,Z)]
df = na.omit(df)
covariate_names = Z


#### Main Text

### 1.Fit the forest
cf <- causal_forest(
  X = as.matrix(df[,covariate_names]),
  Y = df[,Y],
  W = df[,D],
  num.trees = 5000,
  seed = 2019)


### 2.Predict point estimates and standard errors 
pred <- predict(cf, estimate.variance=TRUE)
tauhat <- pred$predictions
tauhat_se <- sqrt(pred$variance.estimates)


### 3.Average Treatment Effects
ate = average_treatment_effect(cf,target.sample = "all",method = "AIPW")
ate
pvalue.ate = 2*pnorm(-abs(ate[1]/ate[2]))
pvalue.ate


### 4.Generating Figure 1: Distribution for the CATEs of Tear Gas Usage
pdf(paste(graphpath,"Figure 1.pdf",sep=""))
ggplot(data.frame(tauhat),aes(x=tauhat)) + 
  geom_histogram(color="grey",bins=30) + 
  geom_vline(aes(xintercept = ate[1],col="ATE")) + 
  scale_color_manual(name="",values = c(ATE="red")) +
  xlab("Treatment Effects") + 
  ylab("Count")
dev.off()


### 5. Generating Figure 2: 
# Export data to make a map

result <- data.frame(tauhat, tauhat_se)
result$hte_upper=result$tauhat+result$tauhat_se*1.96
result$hte_lower=result$tauhat-result$tauhat_se*1.96
result$significant=1
result$significant[(result$hte_upper>0) & (result$hte_lower<0)]=0
sig.prop = nrow(result[result$significant == 1,])/nrow(result)
sig.prop

map = na.omit(data.raw)

make.map <- data.frame(map$id2019,result$tauhat,result$significant)
names(make.map)[names(make.map) == "data.raw.id2019"] <- "id2019"
names(make.map)[names(make.map) == "tau.hat"] <- "htes"
names(make.map)[names(make.map) == "result.significant"] <- "Significant"
write.csv(make.map,"make_map.csv")


### 6.Generating Table 1: Variable Importance (Full results also reported in Appendix)
var_imp <- c(variable_importance(cf))
names(var_imp) <- covariate_names
sorted_var_imp <- sort(var_imp, decreasing=TRUE)
sorted_var_imp
Table1 <- xtable(data.frame(sorted_var_imp),digits=3)
print(Table1, file = "Table1.tex", compress = FALSE)


### 7.Generating Figure 3: Treatment Effect along Important Moderators

# a.mod_var = old_below35

mod_var = "old_below35"

other_covariates = covariate_names[which(covariate_names != mod_var)]
x = seq(min(df[,mod_var]),max(df[,mod_var]),length=100)
df_newx = setNames(data.frame(x), mod_var)
df_median = data.frame(lapply(df[,other_covariates],median))
df_new = crossing(df_median,df_newx)

## prediction
pred_1mv = predict(cf,newdata=as.matrix(df_new[,covariate_names]),estimate.variance=TRUE)
tauhat_1mv = pred_1mv$predictions
tauhat_1mv_se = sqrt(pred_1mv$variance.estimates)
df_new[,"cate"] = tauhat_1mv
df_new[,"lwr"] = tauhat_1mv-1.96*tauhat_1mv_se
df_new[,"upr"] = tauhat_1mv+1.96*tauhat_1mv_se

## plot HTE w.r.t. the moderating variabel, with confidence interval
pdf(paste(graphpath,"Figure 3 Subfigure a.pdf",sep=""))

p1 <- ggplot(data=df_new,aes_string(x=mod_var,y="cate")) + geom_line(data=df_new,color="black") + 
  geom_ribbon(data=df_new,aes(ymin=lwr,ymax=upr),alpha=0.3) + ylab("Treatment Effect") + 
  xlab("(a) Non-first-time below 35") 

p2 <- ggplot() + geom_density(data=df, aes_string(x=mod_var), alpha=.1, 
                              inherit.aes=FALSE, color="red", linetype = "dashed") +  
  scale_y_continuous(expand = expansion(mult = c(0, 0.05)), position = "right", name = "Density")  +
  theme_half_open(11, rel_small = 1) +
  rremove("x.axis")+
  rremove("xlab") +
  rremove("x.text") +
  rremove("x.ticks") +
  rremove("legend") + 
  rremove("y.axis")

aligned_plots <- align_plots(p1, p2, align="hv", axis="tblr")

ggdraw(aligned_plots[[1]]) + draw_plot(aligned_plots[[2]])
dev.off()


# b.mod_var = "transport"

mod_var = "transport"

other_covariates = covariate_names[which(covariate_names != mod_var)]
x = seq(min(df[,mod_var]),max(df[,mod_var]),length=100)
df_newx = setNames(data.frame(x), mod_var)
df_median = data.frame(lapply(df[,other_covariates],median))
df_new = crossing(df_median,df_newx)

## prediction
pred_1mv = predict(cf,newdata=as.matrix(df_new[,covariate_names]),estimate.variance=TRUE)
tauhat_1mv = pred_1mv$predictions
tauhat_1mv_se = sqrt(pred_1mv$variance.estimates)
df_new[,"cate"] = tauhat_1mv
df_new[,"lwr"] = tauhat_1mv-1.96*tauhat_1mv_se
df_new[,"upr"] = tauhat_1mv+1.96*tauhat_1mv_se

## plot HTE w.r.t. the moderating variabel, with confidence interval
pdf(paste(graphpath,"Figure 3 Subfigure b.pdf",sep=""))
p1 <- ggplot(data=df_new,aes_string(x=mod_var,y="cate")) + geom_line(data=df_new,color="black") + 
  geom_ribbon(data=df_new,aes(ymin=lwr,ymax=upr),alpha=0.3) + ylab("Treatment Effect") + 
  xlab("(b) Transportation") 

p2 <- ggplot() + geom_density(data=df, aes_string(x=mod_var), alpha=.1, 
                              inherit.aes=FALSE, color="red", linetype = "dashed") +  
  scale_y_continuous(expand = expansion(mult = c(0, 0.05)), position = "right", name = "Density")  +
  theme_half_open(11, rel_small = 1) +
  rremove("x.axis")+
  rremove("xlab") +
  rremove("x.text") +
  rremove("x.ticks") +
  rremove("legend") + 
  rremove("y.axis")

aligned_plots <- align_plots(p1, p2, align="hv", axis="tblr")

ggdraw(aligned_plots[[1]]) + draw_plot(aligned_plots[[2]])
dev.off()


# c.mod_var = "trade"

mod_var = "trade"

other_covariates = covariate_names[which(covariate_names != mod_var)]
x = seq(min(df[,mod_var]),max(df[,mod_var]),length=100)
df_newx = setNames(data.frame(x), mod_var)
df_median = data.frame(lapply(df[,other_covariates],median))
df_new = crossing(df_median,df_newx)

## prediction
pred_1mv = predict(cf,newdata=as.matrix(df_new[,covariate_names]),estimate.variance=TRUE)
tauhat_1mv = pred_1mv$predictions
tauhat_1mv_se = sqrt(pred_1mv$variance.estimates)
df_new[,"cate"] = tauhat_1mv
df_new[,"lwr"] = tauhat_1mv-1.96*tauhat_1mv_se
df_new[,"upr"] = tauhat_1mv+1.96*tauhat_1mv_se

## plot HTE w.r.t. the moderating variabel, with confidence interval
pdf(paste(graphpath,"Figure 3 Subfigure c.pdf",sep=""))
p1 <- ggplot(data=df_new,aes_string(x=mod_var,y="cate")) + geom_line(data=df_new,color="black") + 
  geom_ribbon(data=df_new,aes(ymin=lwr,ymax=upr),alpha=0.3) + ylab("Treatment Effect") + 
  xlab("(c) Trade") 

p2 <- ggplot() + geom_density(data=df, aes_string(x=mod_var), alpha=.1, 
                              inherit.aes=FALSE, color="red", linetype = "dashed") +  
  scale_y_continuous(expand = expansion(mult = c(0, 0.05)), position = "right", name = "Density")  +
  theme_half_open(11, rel_small = 1) +
  rremove("x.axis")+
  rremove("xlab") +
  rremove("x.text") +
  rremove("x.ticks") +
  rremove("legend") + 
  rremove("y.axis")

aligned_plots <- align_plots(p1, p2, align="hv", axis="tblr")

ggdraw(aligned_plots[[1]]) + draw_plot(aligned_plots[[2]])
dev.off()


# d.mod_var = "income"

mod_var = "income"

other_covariates = covariate_names[which(covariate_names != mod_var)]
x = seq(min(df[,mod_var]),max(df[,mod_var]),length=100)
df_newx = setNames(data.frame(x), mod_var)
df_median = data.frame(lapply(df[,other_covariates],median))
df_new = crossing(df_median,df_newx)

## prediction
pred_1mv = predict(cf,newdata=as.matrix(df_new[,covariate_names]),estimate.variance=TRUE)
tauhat_1mv = pred_1mv$predictions
tauhat_1mv_se = sqrt(pred_1mv$variance.estimates)
df_new[,"cate"] = tauhat_1mv
df_new[,"lwr"] = tauhat_1mv-1.96*tauhat_1mv_se
df_new[,"upr"] = tauhat_1mv+1.96*tauhat_1mv_se

## plot HTE w.r.t. the moderating variabel, with confidence interval
pdf(paste(graphpath,"Figure 3 Subfigure d.pdf",sep=""))
p1 <- ggplot(data=df_new,aes_string(x=mod_var,y="cate")) + geom_line(data=df_new,color="black") + 
  geom_ribbon(data=df_new,aes(ymin=lwr,ymax=upr),alpha=0.3) + ylab("Treatment Effect") + 
  xlab("(d) Income") 

p2 <- ggplot() + geom_density(data=df, aes_string(x=mod_var), alpha=.1, 
                              inherit.aes=FALSE, color="red", linetype = "dashed") +  
  scale_y_continuous(expand = expansion(mult = c(0, 0.05)), position = "right", name = "Density")  +
  theme_half_open(11, rel_small = 1) +
  rremove("x.axis")+
  rremove("xlab") +
  rremove("x.text") +
  rremove("x.ticks") +
  rremove("legend") + 
  rremove("y.axis")

aligned_plots <- align_plots(p1, p2, align="hv", axis="tblr")

ggdraw(aligned_plots[[1]]) + draw_plot(aligned_plots[[2]])
dev.off()


# e.mod_var = "manufacture"

mod_var = "manufacture"

other_covariates = covariate_names[which(covariate_names != mod_var)]
x = seq(min(df[,mod_var]),max(df[,mod_var]),length=100)
df_newx = setNames(data.frame(x), mod_var)
df_median = data.frame(lapply(df[,other_covariates],median))
df_new = crossing(df_median,df_newx)

## prediction
pred_1mv = predict(cf,newdata=as.matrix(df_new[,covariate_names]),estimate.variance=TRUE)
tauhat_1mv = pred_1mv$predictions
tauhat_1mv_se = sqrt(pred_1mv$variance.estimates)
df_new[,"cate"] = tauhat_1mv
df_new[,"lwr"] = tauhat_1mv-1.96*tauhat_1mv_se
df_new[,"upr"] = tauhat_1mv+1.96*tauhat_1mv_se

## plot HTE w.r.t. the moderating variabel, with confidence interval
pdf(paste(graphpath,"Figure 3 Subfigure e.pdf",sep=""))
p1 <- ggplot(data=df_new,aes_string(x=mod_var,y="cate")) + geom_line(data=df_new,color="black") + 
  geom_ribbon(data=df_new,aes(ymin=lwr,ymax=upr),alpha=0.3) + ylab("Treatment Effect") + 
  xlab("(e) Manufacture") 

p2 <- ggplot() + geom_density(data=df, aes_string(x=mod_var), alpha=.1, 
                              inherit.aes=FALSE, color="red", linetype = "dashed") +  
  scale_y_continuous(expand = expansion(mult = c(0, 0.05)), position = "right", name = "Density")  +
  theme_half_open(11, rel_small = 1) +
  rremove("x.axis")+
  rremove("xlab") +
  rremove("x.text") +
  rremove("x.ticks") +
  rremove("legend") + 
  rremove("y.axis")

aligned_plots <- align_plots(p1, p2, align="hv", axis="tblr")

ggdraw(aligned_plots[[1]]) + draw_plot(aligned_plots[[2]])
dev.off()





#### Appendix 



### Generating Table 2: Summary Statistics 
label <- c(          "Pro-democracy share", "Treatment", 
                     "Competing candidates", "Log constituency size", "Log population density", 
                     "First-time voters below 35", "Non-first-time voters below 35", 
                     "First-time voters 35-60", "Non-first-time voters 35-60",
                     "First-time voters above 61", "Female",
                     "Pro-democracy incumbent", "Manufacturing", "Construction", "Trade", "Transportation", "Service",
                     "Information", "Finance", "Professional", "Administration", "Miscellaneous",
                     "Mandarin speaker", "Household size", "Household income", 
                     "Mortgage-to-income ratio", "Rent-to-income ratio", "Floor area", 
                     "Government funded housing", "No. of confrontations")
library(stargazer)
filename.tex <- paste("Table2_appendix.tex")

stargazer(df,
          title="Summary Statistics",
          covariate.labels=label,out=filename.tex)



### Generating Table 3: OLS Regression Results

covariate.matrix <- df[-(1:2)]
covariate.names <- colnames(covariate.matrix) 
sumx <- paste(covariate.names, collapse = " + ")  # "X1 + X2 + X3 + ..." for substitution later
ols.nointeract <- paste("dem", paste("tear + (", sumx, ") ", sep=""), sep=" ~ ")
ols.interaction <- paste("dem", paste("tear * (", sumx, ") ", sep=""), sep=" ~ ")
ols.interaction <- as.formula(ols.interaction)
ols.interaction
lm.ols.nointeract <- lm(ols.nointeract, data=df)
summary(lm.ols.nointeract)
lm.ols.interact <- lm(ols.interaction, data=df)
summary(lm.ols.interact)

covariate.label <- c("Tear gas usage", "Competing candidates", "Log constituency size", "Log population density", 
                     "First-time voters below 35", "Non-first-time voters below 35", 
                     "First-time voters 35-60", "Non-first-time voters 35-60",
                     "First-time voters above 61", "Female",
                     "Pro-democracy incumbent", "Manufacturing", "Construction", "Trade", "Transportation", "Service",
                     "Information", "Finance", "Professional", "Administration", "Miscellaneous",
                     "Mandarin speaker", "Household size", "Household income", 
                     "Mortgage-to-income ratio", "Rent-to-income ratio", "Floor area", 
                     "Government funded housing", "No. of confrontations",
                     "T*Competing candidates", "T*Log constituency size", "T*Log population density", 
                     "T*First-time voters below 35", "T*Non-first-time voters below 35", 
                     "T*First-time voters 35-60", "T*Non-first-time voters 35-60",
                     "T*First-time voters above 61", "T*Female",
                     "T*Pro-democracy incumbent", "T*Manufacturing", "T*Construction", "T*Trade", "T*Transportation", "T*Service",
                     "T*Information", "T*Finance", "T*Professional", "T*Administration", "T*Miscellaneous",
                     "T*Mandarin speaker", "T*Household size", "T*Household income", 
                     "T*Mortgage-to-income ratio", "T*Rent-to-income ratio", "T*Floor area", 
                     "T*Government funded housing", "T*No. of confrontations")

library(stargazer)
filename.tex <- paste("Table3_appendix.tex")
stargazer(lm.ols.nointeract, lm.ols.interact, title="Linear Regression Result", digits = 5,
          dep.var.labels=c("Vote Share of Pro-democracy Candidate"),
          keep.stat="n", ci=TRUE, ci.level=0.90, 
          covariate.labels=covariate.label, single.row=TRUE, out=filename.tex)










